package cn.doitedu.day02

import org.apache.spark.rdd.{PairRDDFunctions, RDD}
import org.apache.spark.{SparkConf, SparkContext}

object T10_ReduceByKeyDemo {

  def main(args: Array[String]): Unit = {

    //1.创建SparkConf
    val conf = new SparkConf().setAppName("MapPartitionsWithIndexDemo")
      .setMaster("local[4]")

    //2.创建SparkContext
    val sc = new SparkContext(conf)

    val lst = List(
      ("spark", 1), ("hadoop", 1), ("hive", 1), ("spark", 1),
      ("spark", 1), ("flink", 1), ("hbase", 1), ("spark", 1),
      ("kafka", 1), ("kafka", 1), ("kafka", 1), ("kafka", 1),
      ("hadoop", 1), ("flink", 1), ("hive", 1), ("flink", 1)
    )
    //通过并行化的方式创建RDD，分区数量为4
    val wordAndOne: RDD[(String, Int)] = sc.parallelize(lst, 4)

    //val pairRDDFunc: PairRDDFunctions[String, Int] = new PairRDDFunctions[String, Int](wordAndOne)
    //val reduce: RDD[(String, Int)] = pairRDDFunc.reduceByKey(_ + _)
    val reduced: RDD[(String, Int)] = wordAndOne.reduceByKey(_ + _)

    reduced.saveAsTextFile("out/out14")
  }

}